library("openxlsx")#绘制ADdata数据分布的直方图
data1=read.xlsx("D:/浏览器下载/data for class3/newfile3.xlsx")
data1=data1[,-1]
names(data1)=NULL
data2=unlist(data1)
View(data2)
data3=data2[!data2%in%c(0)]
ADdata=log(data3,base = 10)
hist(ADdata,col = 5)

data4=read.xlsx("D:/浏览器下载/data for class3/newfile4.xlsx")#绘制GSE67835数据分布的直方图
data4=data4[,-1]
names(data4)=NULL
data5=unlist(data4)
data6=data5[!data5%in%c(0)]
data7=as.numeric(data6)#把字符型数据转化为数字型
GSE67835=log10(data7)
hist(GSE67835,col = 5)

#画火山图
library("ggplot2")
library("ggrepel")
jpeg(file="火山图.jpg", width = 800, height = 800,units = "px")
prostat$threshold[prostat$P<0.05&2^prostat$FC>1.2]="up"
prostat$threshold[prostat$P<0.05&2^prostat$FC<1/1.2]="down"
prostat$threshold[!((prostat$P<0.05&2^prostat$FC>1.2)|(prostat$P<0.05&2^prostat$FC<1/1.2))]="non"

ggplot(data=prostat,aes(x=prostat$FC,y=-log10(prostat$P),colour=threshold))+
  xlab("log2 fold change")+ylab("-log10 P")+
  geom_point(size=3,alpha=0.6)+
  scale_color_manual(values = c('up'="red",'down'="blue",'non'="grey"))+
  geom_hline(yintercept=-log10(0.05),linetype=3)+
  #给火山图加线
  geom_vline(xintercept=c(-log2(1.2),log2(1.2)),linetype=3)+
  #在图上给标注基因的名字
  geom_text_repel(data = subset(prostat, prostat$P<0.05  & (2^prostat$FC>1.2 | 2^prostat$FC<1/1.2)),
                  aes(x=FC,y=-log10(P),label = ID),
                  size = 3,
                  box.padding = unit(0.5, "lines"),
                  point.padding = unit(0.8, "lines"), segment.color = "black", show.legend = FALSE)
dev.off()


  


library("openxlsx")#对ADdata进行层次聚类分析，画出树状图
F2=read.xlsx("D:/浏览器下载/data for class3/newfile3.xlsx")
F2=t(F2)#转置并删除文字行列
colnames(F2)=F2[1,]
F2=F2[-1,]
d=dist(F2)
F3=hclust(d,method = "average")
plot(F3,hang=-1,cex=.8)

    




